Impact of Dietary Shifts on Gut Microbiome Dynamics

Multivariate Insights Using R

R for Bio Data Analysis

Group 16: Eric Torres, Lucia de Lamadrid, Konstantina Gkopi, Elena Iriondo and Jorge Santiago

2024-12-03

Introduction

Objective and Main Research Questions

  • Metagenomics study focusing on an animal model of the human gut ecosystem obtained by transplanting fresh human fecal microbial communities into germ-free C57BL/6J mice.

  • Measuring the effects of diet (Western or Low-Fat Plant Polysaccharide-rich), temporal and spatial composition of the microbiota, colonisation history…

Our aim:

To study the relationship between the composition of the gut microbiota and factors such as diet and colonisation history.

Main Research Questions:

  • Modelling of the Firmicutes-Bacteroidetes ratio

  • Deciphering key factors shaping ecosystem composition through PCA and clustering analysis

  • Evalauting the effect of diet in the microbial biodiversity

Materials and Methods

General Workflow

# A tibble: 6 × 6,701
   Diet Source Donor CollectionMet   Sex     OTU0     OTU1     OTU2     OTU3
  <dbl>  <dbl> <dbl>         <dbl> <dbl>    <dbl>    <dbl>    <dbl>    <dbl>
1     0      0     0             0     0 1.56e-11 4.72e-11 1.23e-11 4.52e-11
2     0      1     0             0     0 2.36e-11 9.53e-11 3.33e-11 2.67e-11
3     0      2     0             1     0 6.77e-11 3.68e-11 8.02e-11 5.49e-11
4     0      2     0             0     0 5.52e-11 9.89e-11 4.58e-11 3.54e-11
5     0      3     0             0     0 5.24e-11 6.34e-11 2.35e-11 7.47e-11
6     0      4     0             1     0 7.67e-11 7.22e-11 5.41e-11 1.20e-11
# ℹ 6,692 more variables: OTU4 <dbl>, OTU5 <dbl>, OTU6 <dbl>, OTU7 <dbl>,
#   OTU8 <dbl>, OTU9 <dbl>, OTU10 <dbl>, OTU11 <dbl>, OTU12 <dbl>, OTU13 <dbl>,
#   OTU14 <dbl>, OTU15 <dbl>, OTU16 <dbl>, OTU17 <dbl>, OTU18 <dbl>,
#   OTU19 <dbl>, OTU20 <dbl>, OTU21 <dbl>, OTU22 <dbl>, OTU23 <dbl>,
#   OTU24 <dbl>, OTU25 <dbl>, OTU26 <dbl>, OTU27 <dbl>, OTU28 <dbl>,
#   OTU29 <dbl>, OTU30 <dbl>, OTU31 <dbl>, OTU32 <dbl>, OTU33 <dbl>,
#   OTU34 <dbl>, OTU35 <dbl>, OTU36 <dbl>, OTU37 <dbl>, OTU38 <dbl>, …

Data Tidying and Filtering

  • Added a SampleID column to uniquely identify each sample.

  • Transformed the dataset from wide to long format for easier analysis.

  • Filtering Low-Abundance OTUs: Retained OTUs contributing up to 95% of cumulative abundance.

  • Replaced the numeric codes with descriptive labels.

# Creation and relocation of SampleID
metadata_df <- metadata_df |>
  mutate(SampleID = row_number()) |>  # Create SampleID from the first column
  relocate(SampleID, 
           .before = everything())  # Move SampleID to the first position

metadata_df_long <- metadata_df |> 
  pivot_longer(
    cols = starts_with("OTU"), 
    names_to = "OTU", 
    values_to = "rel_abundance"
  )

head(metadata_df_long)

# Calculate cumulative contribution
cumulative_otus <- metadata_df_long |>
  group_by(OTU) |>
  summarize(mean_abundance = mean(rel_abundance)) |>
  arrange(desc(mean_abundance)) |>
  mutate(cumulative_abundance = cumsum(mean_abundance) / sum(mean_abundance))

# Filter OTUs contributing to 95% cumulative abundance
otus_to_keep <- cumulative_otus |>
  filter(cumulative_abundance <= 0.95) |>
  pull(OTU)

# Number of OTUs before filtering
n_total_otus <- metadata_df_long |> 
  pull(OTU) |> 
  n_distinct()

# Number of OTUs after filtering
n_filtered_otus <- filtered_metadata |> 
  pull(OTU) |> 
  n_distinct()

filtered_metadata_stricter_label <- filtered_metadata_stricter |> 
  mutate(Diet = case_when(Diet == 0 ~ "LFPP",
                          Diet == 1 ~ "Western",
                          Diet == 2 ~ "CARBR",
                          Diet == 3 ~ "FATR",
                          Diet == 4 ~ "Suckling",
                          Diet == 5 ~ "Human")) |> 
  mutate(Source = case_when(Source == 0 ~ "Cecum1",
                          Source == 1 ~ "Cecum2", 
                          Source == 2 ~ "Colon1", 
                          Source == 3 ~ "Colon2", 
                          Source == 4 ~ "Feces",
                          Source == 5 ~ "SI1",
                          Source == 6 ~ "SI13", 
                          Source == 7 ~ "SI15", 
                          Source == 8 ~ "SI2", 
                          Source == 9 ~ "SI5",
                          Source == 10 ~ "SI9", 
                          Source == 11 ~ "Stomach", 
                          Source == 12 ~ "Cecum")) |> 
  mutate(Donor = case_when(Donor == 0 ~ "HMouseLFPP",
                          Donor == 1 ~ "CONVR", 
                          Donor == 2 ~ "Human", 
                          Donor == 3 ~ "Fresh", 
                          Donor == 4 ~ "Frozen",
                          Donor == 5 ~ "HMouseWestern", 
                          Donor == 6 ~ "CONVD")) |> 
  mutate(CollectionMet = case_when(CollectionMet == 0 ~ "Contents",
                                   CollectionMet == 1 ~ "Scraping")) |> 
  mutate(Sex = case_when(Sex == 0 ~ "Male",
                         Sex == 1 ~ "Female")) 
head(filtered_metadata_stricter_label)

Our data is tidy… and ready to be augmented!

We will use the OTUs taxonomy file to add columns with the names of phylum and class for each OTU, using left_join.

clean_df <- read_tsv('../data/02_metadata_long_filtered_label.tsv')
head(clean_df)
otu_df_original <- read.table('../data/01_data_otu.tsv', header = TRUE, sep = ",")
head(otu_df_original)
  OTU.ID  Kingdom        Phylum         Class           Order
1   OTU0 Bacteria                                            
2   OTU1 Bacteria    Firmicutes    Clostridia   Clostridiales
3   OTU2 Bacteria    Firmicutes       Bacilli Lactobacillales
4   OTU3 Bacteria Bacteroidetes Bacteroidetes   Bacteroidales
5   OTU4 Bacteria Bacteroidetes                              
6   OTU5 Bacteria    Firmicutes    Clostridia   Clostridiales
              Family           Genus X X.1
1                                         
2    Ruminococcaceae                      
3    Enterococcaceae    Enterococcus      
4 Porphyromonadaceae Parabacteroides      
5                                         
6                                         
clean_df_augm <- clean_df |>  
  left_join(otu_df_modified, 
            join_by(OTU == OTU.ID)) |> 
  relocate(Phylum, Class, .after = OTU) 
head(clean_df_augm)

Results

Microbiota composition in terms of phyla in different:

  • sources and diet types

  • diet and donor combination

05

Principal Component Analysis on Phylum-Level Aggregated Microbiome Data

# Aggregate relative abundances by phylum
aggregated_data <- clean_df_augm |>
  filter(Donor == "Fresh") |> #we select the first generation of humanised mice (we don't select the western and lfpp diet as these are the only diets these mice follow)
  group_by(SampleID, Phylum, Diet) |> 
  summarize(rel_abundance = sum(rel_abundance), .groups = "drop")

# Pivot wider and one-hot coding of diet variable to prepare for PCA (SampleID by Phylum)
aggregated_wide <- aggregated_data |> 
  pivot_wider(names_from = Phylum, values_from = rel_abundance) |> 
  mutate(Diet = case_when(Diet == "LFPP" ~ 0, Diet == "Western" ~ 1)) #pca uses numerical values so we modify the diet column

# Check of the aggregated data
head(aggregated_wide)

Principal Component Analysis on Phylum-Level Aggregated Microbiome Data

Analysis of Microbiome Clusters by Donor Groups Using Hierarchical Clustering

# Read the metadata in wide format
filtered_metadata_wider <- read_tsv("../data/02_metadata_wide_filtered_label.tsv")
# Select OTU columns
otu_data <- filtered_metadata_wider |>
  select(starts_with("OTU"))

# Scale the OTU data
otu_data_scaled <- otu_data |> 
  scale()

# Convert scaled matrix back to tibble for tidyverse compatibility
otu_data_scaled <- as_tibble(otu_data_scaled)

# Add relevant metadata (e.g., Donor)
otu_data_with_metadata <- otu_data_scaled |>
  mutate(Donor = filtered_metadata_wider |> pull(Donor))

# Compute Euclidean distance matrix
dist_matrix <- otu_data_scaled |>
  dist()

# Perform hierarchical clustering
hclust_result <- hclust(dist_matrix, method = "ward.D2")

# Cut dendrogram into 3 clusters
cluster_labels <- cutree(hclust_result, k = 3) |>
  as_tibble() |>
  rename(Cluster = value)

# Attach cluster labels to metadata
clustered_metadata <- filtered_metadata_wider |> 
  mutate(Cluster = cluster_labels |> pull(Cluster))

07

Discussion

  • The “Obesity-inducing” diet influences the Firmicutes-Bacteroidetes ratio
  • PCA shows how diet shapes microbial composition, as well as the relationship between different phyla.

  • Clustering shines light on how the microbiota donor structures the data

  • The Western diet favours a more biodiverse gut ecosystem